##################
# Angel M. Villegas-Cruz
# This file replicates the regression tables used as robustness checks in the appendix, namely Appendix Tables 3, 4
# 08/18/2024
##################
library(MASS)
library(xtable)
#remotes::install_github("vdeminstitute/vdemdata")
library(vdemdata)
library(systemfit)

#Run "Replication - Main results.R"

##################
#Appendix Table 3
##################

#Make a a continuous variable measuring the percentage of Muslims 
#in the host country’s population
mydata$muslim_pct = mydata$muslim_pct_2020*100

head(mydata$muslim_pct)

#Appendix Table 3 Model 1 is a robustness check that uses a 
#continuous battle-related deaths variable.

#ucdp_bd_best robustness check Model 1
f1 <- glm(threat ~ (ucdp_bd_best) +
            elec_demo + ln_gdppc +
            agree + ln_trade_share_i +
            ln_followers_count + ln_statuses_count + 
            ln_friends_count + xinjiang_num + factor(month),
          data = mydata, family = "binomial")

#Appendix Table 3 Model 2 is a robustness check using a continuous variable 
#measuring the percentage of
#Muslims in the host country’s population.

#muslim pct robustness check Model 2
f2 <- glm(bene_rule ~ muslim_pct + 
            elec_demo + ln_gdppc + 
            agree + ln_trade_share_i +
            ln_followers_count + ln_statuses_count + 
            ln_friends_count + xinjiang_num + factor(month),
          data = mydata, family = "binomial")

#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))

#Creating Appendix Table 3: Robustness check with continuous variables for battle-related deaths and Muslim countries
stargazer(f1,f2,type='text', out="appexid_table3.html",
          title="",
          dep.var.caption = "Repression image management strategies",
          dep.var.labels = c("Threat","Benevolent rule"),
          covariate.labels = c("Battle-related deaths",
                               "Muslim (pop. %)",
                               "Electoral democracy",
                               "Log(GDP p.c.)",
                               "UN voting with China",
                               "Log(Trade share with China)",
                               "Log(N. of followers)",
                               "Log(N. of tweets)",
                               "Log(N. of friends)",
                               "N. of tweets about Xinjiang"),
          omit.stat = "f",
          se        = list(robust_se1,robust_se2), 
          add.lines = list(c("Month fixed-effects", "Yes","Yes")),
          column.sep.width = "-15pt",  font.size = "normalsize",digits=2)

#Note: Since they are too long, the main manuscript does not include all the month binary variables (i.e., month fixed effects).

#Open Appendix Table 3 in browser
BROWSE("appexid_table3.html")

##################
#Appendix Table 4
##################

#This is a robustness check aggregating the dependent variables by month and using count models.
#As suggested by reviewer, I use negative binomial regressions to re-test the hypotheses

#select variables I want
x = mydata %>% dplyr::select("screen_name", "year","month",
                             "threat","bene_rule",
                             "ucdp_civil_war",
                             "muslim", "muslim_pct",
                             "elec_demo","ln_gdppc","agree","ln_trade_share_i",
                             "ln_followers_count","ln_statuses_count",
                             "ln_friends_count","xinjiang_num")

#collapse data by month (average)
x <- x %>% group_by(screen_name,month) %>% mutate_if(is.numeric, mean, na.rm = TRUE)
x <- x %>% distinct(month, screen_name, .keep_all = TRUE) #remove duplicates

#collapse data by month (sum)
x2 <- mydata %>% group_by(screen_name,month) %>% mutate_if(is.numeric, sum, na.rm = TRUE)
x3 <- x2 %>% distinct(month, screen_name, .keep_all = TRUE) #remove duplicates
#This is to obtain a count variable for the repression image management strategies
x$threat = x3$threat
x$bene_rule = x3$bene_rule
x$xinjiang_num = x3$xinjiang_num

#dependent variable is no longer a binary variable
summary(x$threat)
var(x$threat)
summary(x$bene_rule)
var(x$bene_rule)

#Conducting negative binomial model 1
f1 <-  glm.nb(threat ~ ucdp_civil_war + muslim + elec_demo + ln_gdppc +
                agree + ln_trade_share_i +
                ln_followers_count + ln_statuses_count + 
                ln_friends_count + xinjiang_num + factor(month),
              data = x)

#Conducting negative binomial model 1
f2 <-  glm.nb(bene_rule ~ ucdp_civil_war + muslim + elec_demo + ln_gdppc +
                agree + ln_trade_share_i +
                ln_followers_count + ln_statuses_count + 
                ln_friends_count + xinjiang_num + factor(month),
              data = x)

#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))

#Creating Appendix Table 4: Negative binomial regression models estimating the effects of recipient characteristics on repression image management strategies (Monthly data)
stargazer(f1,f2,type='text',  out="appexid_table4.html",
          title="",
          dep.var.caption = "Repression image management strategies",
          covariate.labels = c("Civil war",
                               "Muslim-majority",
                               "Electoral democracy",
                               "Log(GDP p.c.)",
                               "UN voting with China",
                               "Log(Trade share with China)",
                               "Log(N. of followers)",
                               "Log(N. of tweets)",
                               "Log(N. of friends)",
                               "N. of tweets about Xinjiang"),
          dep.var.labels = c("Threat","Benevolent rule"),
          omit.stat = "f",
          se        = list(robust_se1,robust_se2), 
          add.lines = list(c("Month fixed-effects", "Yes","Yes")),
          column.sep.width = "-15pt",  font.size = "normalsize",digits=2)

#Note: Since they are too long, the main manuscript does not include all the month binary variables (i.e., month fixed effects).

#Open Appendix Table 4 in browser
BROWSE("appexid_table4.html")

##################
#Appendix Table 5
##################

colnames(mydata)

head(mydata$Country) #country
head(mydata$screen_name) # twitter handdle
head(mydata$short_name) # diplomatic mission

x = mydata %>% dplyr::select(short_name, screen_name, Country)

x = x %>% distinct(Country, screen_name, .keep_all = TRUE) #remove duplicates
#embassy_chinese is Tonga

#Creating Appendix Table 5: Chinese diplomatic missions using Twitter by country in the dataset
xtable(x)

##################
#Appendix Table 6
##################

#As suggested by reviewer, this is a robustness check using V-Dem's Electoral democracy index rather than Freedom House's Electoral Index.

#v2x_polyarchy == Electoral democracy index (Continuous)

head(vdem$v2x_polyarchy)
table(vdem$v2x_polyarchy)

x = vdem %>% dplyr::select(country_name, year, v2x_polyarchy) #keep relevant columns
x$COWcode <- countrycode(x$country_name, origin = "country.name", destination = "cown") 
#Have to manually give code to Serbia because of Yugoslavia years
x$COWcode <- ifelse(x$country_name == "Serbia", "345", x$COWcode)

x = subset(x, year > 2013) #subset to my time frame

mydata$COWcode <- countrycode(mydata$Country, origin = "country.name", destination = "cown") 
mydata$COWcode <- ifelse(mydata$Country == "Serbia", "345", mydata$COWcode)

#Merge the two datasets (namely my data and V-Dem data)
x = merge.data.frame(mydata, x, by = c("COWcode","year"), all.x = T)

#model 1
f1 <- glm(threat ~ ucdp_civil_war + muslim + v2x_polyarchy +
            ln_gdppc +
            agree + ln_trade_share_i +
            ln_followers_count + ln_statuses_count + 
            ln_friends_count + xinjiang_num + factor(month),
          data = x, family = "binomial")
#model 2
f2 <- glm(bene_rule ~ ucdp_civil_war + muslim  + v2x_polyarchy +
            ln_gdppc + agree + ln_trade_share_i +
            ln_followers_count + ln_statuses_count + 
            ln_friends_count + xinjiang_num + factor(month),
          data = x, family = "binomial")

#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))

#Creating #Appendix Table 6: Logistic regression models estimating the effects of recipient characteristics on repression image management strategies (with V-Dem’s Electoral Democracy Index)
stargazer(f1,f2,type='text', out="appexid_table6.html",
          title="",
          dep.var.caption = "Repression image management strategies",
          dep.var.labels = c("Threat","Benevolent rule"),
          covariate.labels = c("Civil war",
                               "Muslim-majority",
                               "Electoral regime (V-Dem)",
                               "Log(GDP p.c.)",
                               "UN voting with China",
                               "Log(Trade share with China)",
                               "Log(N. of followers)",
                               "Log(N. of tweets)",
                               "Log(N. of friends)",
                               "N. of tweets about Xinjiang"),
          omit.stat = "f",
          se        = list(robust_se1,robust_se2), 
          add.lines = list(c("Month fixed-effects", "Yes","Yes")),
          column.sep.width = "-15pt",  font.size = "normalsize",digits=2)

#Note: Since they are too long, the main manuscript does not include all the month binary variables (i.e., month fixed effects).

#Open Appendix Table 6 in browser
BROWSE("appexid_table6.html")

##################
#Appendix Table 7
##################

#As suggested by reviewer, I use seemingly unrelated regression models to re-test the hypotheses

#Defining the equations
r1= threat ~ ucdp_civil_war + muslim + elec_demo +
  ln_gdppc +
  agree + ln_trade_share_i +
  ln_followers_count + ln_statuses_count + 
  ln_friends_count + xinjiang_num + factor(month)

r2 = bene_rule ~ ucdp_civil_war + muslim  + elec_demo +
  ln_gdppc + agree + ln_trade_share_i +
  ln_followers_count + ln_statuses_count + 
  ln_friends_count + xinjiang_num + factor(month)

# A summary of the systemfit first shows a summary of the system, then the separate equations, and then how the residuals of the two equations are related. 
#These are followed by the OLS fits of the separate equations
fitsur <- systemfit(list(readreg = r1, mathreg = r2), data=mydata)

#Creating Appendix Table 7: Seemingly unrelated regression models estimating the effects of recipient characteristics on repression image management strategies
summary(fitsur)

#Note: Since they are too long, the main manuscript does not include all the month binary variables (i.e., month fixed effects).

#Stargazer or xtable do not seem to work with this package
#Transferred results from statistical software to the manuscript manually

##################
#Appendix Table 8
##################

#As suggested by reveiwrers, I run some regression models including the strategy that is not applied as an independent variable in the analyses

#Conducting logistic regression model 1
f1 <- glm(threat ~ bene_rule + ucdp_civil_war + muslim + elec_demo +
            ln_gdppc +
            agree + ln_trade_share_i +
            ln_followers_count + ln_statuses_count + 
            ln_friends_count + xinjiang_num + factor(month),
          data = mydata, family = "binomial")

#Conducting logistic regression model 2
f2 <- glm(bene_rule ~ threat + ucdp_civil_war + muslim  + elec_demo +
            ln_gdppc + agree + ln_trade_share_i +
            ln_followers_count + ln_statuses_count + 
            ln_friends_count + xinjiang_num + factor(month),
          data = mydata, family = "binomial")

#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))

#Creating Appendix Table 8: Logistic regression models estimating the effects of recipient characteristics on repression image management strategies (Controlling for diverse strategies)
stargazer(f1,f2,type='text', out="appexid_table8.html",
          title="",
          dep.var.caption = "Repression image management strategies",
          dep.var.labels = c("Threat","Benevolent rule"),
          covariate.labels = c("Benevolent rule strategy",
                               "Threat strategy",
                               "Civil war",
                               "Muslim-majority",
                               "Electoral democracy",
                               "Log(GDP p.c.)",
                               "UN voting with China",
                               "Log(Trade share with China)",
                               "Log(N. of followers)",
                               "Log(N. of tweets)",
                               "Log(N. of friends)",
                               "N. of tweets about Xinjiang"),
          omit.stat = "f",
          se        = list(robust_se1,robust_se2), 
          add.lines = list(c("Month fixed-effects", "Yes","Yes")),
          column.sep.width = "-15pt",  font.size = "normalsize",digits=2)

#Note: Since they are too long, the main manuscript does not include all the month binary variables (i.e., month fixed effects).

#Open Appendix Table 8 in browser
BROWSE("appendix_table8.html")
